library(tidyverse)
library(scales)
library(haven)
library(hrbrthemes)
library(extrafont)
library(cowplot)

setwd("")
dat <- read_dta("master-for-replication.dta")

# Figure 1 ----------------------------------------------------------------

dat1 <- dat %>% select(voteNovember, fakebin = totalconfnbinary, weight) %>% mutate(slant = "con")
dat2 <- dat %>% select(voteNovember, fakebin = totallibfnbinary, weight) %>% mutate(slant = "lib")
dat_both <- bind_rows(dat1, dat2) %>% filter(voteNovember <= 2)

ggdat <- dat_both %>% group_by(voteNovember, slant) %>% summarize(y = weighted.mean(fakebin, w = weight, na.rm = TRUE),
                                                                  yvar = diagis::weighted_se(fakebin, w = weight, na.rm = TRUE),
                                                                  ymin = y - 1.96*yvar,
                                                                  ymax = y + 1.96*yvar)
ggdat <- ggdat %>% mutate(support = case_when(voteNovember == 1 ~ "Clinton supporters",
                                              voteNovember == 2 ~ "Trump supporters"))
ggdat$slant <- as_factor(ggdat$slant)
ggdat$slant <- relevel(ggdat$slant, ref = "lib")

g <- ggplot(data = ggdat, aes(x = support, y = y, ymin = ymin, ymax = ymax, shape = slant)) + 
  geom_pointrange(fatten = 2.8, position = position_dodge(width = .3)) +
  scale_shape_manual(" Untrustworthy\n news type", labels = c("pro-Clinton", "pro-Trump"), values = c(2, 16), guide = FALSE) + 
  scale_y_continuous("Mean % consuming untrustworthy websites", limits = c(0, 1), label = percent) + 
  xlab("")

gnew <- g + theme_ipsum_rc(grid = "Y")
gnew

dat <- dat %>% mutate(propcon = totalconfncount/totalnewsfncount_grinberg,
                      proplib = totallibfncount/totalnewsfncount_grinberg)

dat1 <- dat %>% select(voteNovember, fake = propcon, weight) %>% mutate(slant = "con")
dat2 <- dat %>% select(voteNovember, fake = proplib, weight) %>% mutate(slant = "lib")
dat_both <- bind_rows(dat1, dat2) %>% filter(voteNovember <= 2)
dat_both <- dat_both %>% mutate(support = case_when(voteNovember == 1 ~ "Clinton supporters",
                                                    voteNovember == 2 ~ "Trump supporters"))

ggdat <- dat_both %>% group_by(voteNovember, slant) %>% summarize(y = weighted.mean(fake, w = weight, na.rm = TRUE),
                                                                  yvar = diagis::weighted_se(fake, w = weight, na.rm = TRUE),
                                                                  ymin = y - 1.96*yvar,
                                                                  ymax = y + 1.96*yvar)
ggdat <- ggdat %>% mutate(support = case_when(voteNovember == 1 ~ "Clinton supporters",
                                              voteNovember == 2 ~ "Trump supporters"))
ggdat$slant <- as_factor(ggdat$slant)
ggdat$slant <- relevel(ggdat$slant, ref = "lib")
dat_both$slant <- as_factor(dat_both$slant)
dat_both$slant <- relevel(dat_both$slant, ref = "lib")

g <- ggplot(data = dat_both, aes(y = fake, x = support, shape = slant)) + 
  geom_point(alpha = 0.2, position = position_jitter(width = .15), color = "gray50") +
  geom_pointrange(data = ggdat, aes(x = support, y = y, ymin = ymin, ymax = ymax, shape = slant), fatten = 2.8, position = position_dodge(width = .3)) + 
  scale_shape_manual(" Untrustworthy\n news type", labels = c("pro-Clinton", "pro-Trump"), values = c(2, 16)) + 
  scale_y_continuous("Mean information diet share", limits = c(0, 1), label = percent) + 
  xlab("")

gnew2 <- g + theme_ipsum_rc(grid = "Y")
gnew2

fig1 <- plot_grid(gnew, gnew2, nrow = 1, align = "v", axis = "t", rel_widths = c(5,7))
fig1
# ggsave("figure1_combined.png", fig1, width = 11, height = 5)

# Figure 2 ----------------------------------------------------------------

dat1 <- dat %>% select(decile, fakebin = totalconfnbinary, weight) %>% mutate(slant = "con")
dat2 <- dat %>% select(decile, fakebin = totallibfnbinary, weight) %>% mutate(slant = "lib")
dat_both <- bind_rows(dat1, dat2) %>% filter(!is.na(decile))

deciles <- dat_both %>% group_by(decile, slant) %>% summarize(y = weighted.mean(fakebin, w = weight, na.rm = TRUE),
                                                              yvar = diagis::weighted_se(fakebin, w = weight, na.rm = TRUE),
                                                              ymin = y - 1.96*yvar,
                                                              ymax = y + 1.96*yvar)

deciles$slant <- as_factor(deciles$slant)
deciles$slant <- relevel(deciles$slant, ref = "lib")

g <- ggplot(data = deciles, aes(x = decile, y = y, ymin = ymin, ymax = ymax, shape = slant)) + 
  geom_pointrange(fatten = 2.8, position = position_dodge(width = .3)) +
  scale_shape_manual(" Untrustworthy\n news type", labels = c("pro-Clinton", "pro-Trump"), values = c(2, 16), guide = FALSE) + 
  scale_y_continuous("Mean % consuming untrustworthy websites", limits = c(-.0008, 1), label = percent) + 
  scale_x_continuous(breaks = 1:10) + 
  xlab("Average media diet slant decile (liberal to conservative)")

gnew <- g + theme_ipsum_rc(grid = "Y")
gnew

dat1 <- dat %>% select(decile, fake = propcon, weight) %>% mutate(slant = "con")
dat2 <- dat %>% select(decile, fake = proplib, weight) %>% mutate(slant = "lib")
dat_both <- bind_rows(dat1, dat2) %>% filter(!is.na(decile))

deciles <- dat_both %>% group_by(decile, slant) %>% summarize(y = weighted.mean(fake, w = weight, na.rm = TRUE),
                                                              yvar = diagis::weighted_se(fake, w = weight, na.rm = TRUE),
                                                              ymin = y - 1.96*yvar,
                                                              ymax = y + 1.96*yvar)

deciles$slant <- as_factor(deciles$slant)
deciles$slant <- relevel(deciles$slant, ref = "lib")
dat_both$slant <- as_factor(dat_both$slant)
dat_both$slant <- relevel(dat_both$slant, ref = "lib")

g <- ggplot(data = dat_both, aes(y = fake, x = decile, shape = slant)) + 
  geom_point(alpha = 0.2, position = position_jitter(width = .15), color = "gray50") +
  geom_pointrange(data = deciles, aes(x = decile, y = y, ymin = ymin, ymax = ymax, shape = slant), fatten = 2.8, position = position_dodge(width = .3)) + 
  scale_shape_manual(" Untrustworthy\n news type", labels = c("pro-Clinton", "pro-Trump"), values = c(2, 16)) + 
  scale_y_continuous("Mean information diet share", limits = c(-.0008, 1), label = percent) + 
  scale_x_continuous(breaks = 1:10) + 
  xlab("Average media diet slant decile (liberal to conservative)")

gnew2 <- g + theme_ipsum_rc(grid = "Y")
gnew2

fig2 <- plot_grid(gnew, gnew2, nrow = 1, align = "v", axis = "t", rel_widths = c(5,7))
fig2
# ggsave("figure2_combined.png", fig2, width = 11, height = 5)


# Figure 3 ----------------------------------------------------------------

dat$crt <- ifelse(dat$crt == 3, 2, dat$crt)
dat_both <- dat %>% select(voteNovember, crt, fakebin = totalconfnbinary, weight) %>% filter(voteNovember <= 2)
ggdat <- dat_both %>% group_by(voteNovember, crt) %>% summarize(y = weighted.mean(fakebin, w = weight, na.rm = TRUE),
                                                                yvar = diagis::weighted_se(fakebin, w = weight, na.rm = TRUE),
                                                                ymin = y - 1.96*yvar,
                                                                ymax = y + 1.96*yvar)
ggdat <- ggdat %>% mutate(support = case_when(zap_labels(voteNovember) == 1 ~ "Clinton supporters",
                                              zap_labels(voteNovember) == 2 ~ "Trump supporters")) %>% filter(!is.na(crt))
ggdat$crt <- as_factor(ggdat$crt)

g <- ggplot(data = ggdat, aes(x = support, y = y, ymin = ymin, ymax = ymax, shape = crt)) + 
  geom_pointrange(fatten = 2.8, position = position_dodge(width = .3)) +
  scale_shape_manual("", labels = c("Low CRT", "Medium CRT", "High CRT"), values = c(17, 16, 15), guide = FALSE) + 
  scale_y_continuous("Mean % consuming any untrustworthy conservative websites", limits = c(0, 1), label = percent) + 
  xlab("")

gnew <- g + theme_ipsum_rc(grid = "Y")
gnew

dat_both <- dat %>% select(voteNovember, crt, fake = propcon, weight) %>% filter(voteNovember <= 2)
dat_both <- dat_both %>% mutate(support = case_when(voteNovember == 1 ~ "Clinton supporters",
                                                    voteNovember == 2 ~ "Trump supporters")) %>% filter(!is.na(crt))

ggdat <- dat_both %>% group_by(voteNovember, crt) %>% summarize(y = weighted.mean(fake, w = weight, na.rm = TRUE),
                                                                yvar = diagis::weighted_se(fake, w = weight, na.rm = TRUE),
                                                                ymin = y - 1.96*yvar,
                                                                ymax = y + 1.96*yvar)
ggdat <- ggdat %>% mutate(support = case_when(zap_labels(voteNovember) == 1 ~ "Clinton supporters",
                                              zap_labels(voteNovember) == 2 ~ "Trump supporters"))
ggdat$crt <- as_factor(ggdat$crt)
dat_both$crt <- as_factor(dat_both$crt)

g <- ggplot(data = dat_both, aes(y = fake, x = support, shape = crt)) + 
  geom_point(alpha = 0.2, position = position_jitter(width = .15), color = "gray50") +
  geom_pointrange(data = ggdat, aes(x = support, y = y, ymin = ymin, ymax = ymax, shape = crt), fatten = 2.8, position = position_dodge(width = .3)) + 
  scale_shape_manual("", labels = c("Low CRT", "Medium CRT", "High CRT"), values = c(17, 16, 15)) + 
  scale_y_continuous("Mean information diet share", limits = c(0, 1), label = percent) + 
  xlab("")

gnew2 <- g + theme_ipsum_rc(grid = "Y")
gnew2

fig3 <- plot_grid(gnew, gnew2, nrow = 1, align = "v", axis = "t", rel_widths = c(5,7))
fig3
# ggsave("figure3_combined.png", fig3, width = 11, height = 5)


# Figure 4 / S6 ----------------------------------------------------------------

# ref <- read_dta("referrer-sum-stats-grinberg-15.dta")
# ref <- read_dta("referrer-sum-stats-grinberg-45.dta")
ref <- read_dta("referrer-sum-stats-grinberg-30.dta")
names(ref) <- c("infotype", "Facebook", "Google", "Twitter", "Webmail")
ref$fakelab <- c("Untrustworthy websites", "Hard news", "Neither")
ref <- ref %>% select(-infotype) %>% pivot_longer(-fakelab, names_to = "pathway", values_to = "prop")
ref$fakelab <- as_factor(ref$fakelab)

g <- ggplot(data = ref, aes(y = prop, x = pathway, fill = fakelab)) + 
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  scale_fill_brewer("", palette = "Greys") +
  scale_y_continuous("Estimated % of visits originating from referrer", limits = c(0, 0.3), label = percent_format(accuracy = 5L)) + 
  xlab("")

gnew <- g + theme_ipsum_rc(grid = "Y")
gnew

# ggsave("figure4.png", width = 8, height = 6)
# ggsave("SM_referrers15.png", width = 8, height = 6)
# ggsave("SM_referrers45.png", width = 8, height = 6)

